rm(list=ls(all=T))
Sys.setlocale("LC_ALL","C")
## [1] "C"
pacman::p_load(dplyr, ggplot2, plotly)
load("rdata/Z.rdata")
library(maps)
## Warning: package 'maps' was built under R version 3.5.3
Brazil = map_data("world") %>% filter(region=="Brazil")
brazilPlot = ggplot() +
geom_polygon(data = Brazil, aes(x=long, y = lat, group = group), fill="gray")
# Removing some outliers
#Brazils most Northern spot is at 5 deg 16′ 27.8″ N latitude.;
Geo = Geo[Geo$geolocation_lat <= 5.27438888,]
#it’s most Western spot is at 73 deg, 58′ 58.19″W Long.
Geo = Geo[Geo$geolocation_lng >= -73.98283055,]
#It’s most southern spot is at 33 deg, 45′ 04.21″ S Latitude.
Geo = Geo[Geo$geolocation_lat >= -33.75116944,]
#It’s most Eastern spot is 34 deg, 47′ 35.33″ W Long.
Geo = Geo[Geo$geolocation_lng <= -34.79314722,]
n_distinct(Geo$geolocation_zip_code_prefix)
## [1] 19010
table(Geo$geolocation_zip_code_prefix) %>% table %>% tail
## .
## 832 879 907 965 1102 1146
## 1 1 1 1 1 1
merge data: Custome + Order + Geolocation + OrderPayment
custlocation = Geo %>% group_by(geolocation_zip_code_prefix) %>%
summarise(custlat = max(geolocation_lat),
custlng = max(geolocation_lng))
COG = OrdPay[!duplicated(OrdPay$order_id),] %>%
right_join(Ord, by = "order_id") %>%
left_join(Cust, by="customer_id") %>%
left_join(custlocation, by=c("customer_zip_code_prefix"="geolocation_zip_code_prefix"))
看每個order都來自哪裡
g = brazilPlot +
geom_point(data = COG,aes(x=custlng,y=custlat,color=customer_state),size=0.2)
ggplotly(g)